{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Biomart example related to ensembl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from bioservices import BioMart"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# biomart initialisation could be slow and fail... depending on connection and timeout\n",
    "# need to be fixed.\n",
    "s = BioMart()\n",
    "s.host = \"www.ensembl.org\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Ensembl example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'database': 'ensembl_mart_84',\n",
       "  'default': '1',\n",
       "  'displayName': 'Ensembl Genes 84',\n",
       "  'host': 'www.ensembl.org',\n",
       "  'includeDatasets': '',\n",
       "  'martUser': '',\n",
       "  'name': 'ENSEMBL_MART_ENSEMBL',\n",
       "  'path': '/biomart/martservice',\n",
       "  'port': '80',\n",
       "  'serverVirtualSchema': 'default',\n",
       "  'visible': '1'}]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[x for x in s.registry() if 'ensembl' in x['database']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[u'hsapiens_gene_ensembl']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# which dataset are we going to use ?\n",
    "[x for x in s.datasets('ENSEMBL_MART_ENSEMBL') if 'sapiens' in x.lower()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dataset = 'hsapiens_gene_ensembl'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[u'clone_based_ensembl_gene_name',\n",
       " u'ensembl_gene_id',\n",
       " u'ensembl_transcript_id',\n",
       " u'ensembl_peptide_id',\n",
       " u'with_ox_clone_based_ensembl_transcript',\n",
       " u'clone_based_ensembl_transcript_name',\n",
       " u'with_ox_clone_based_ensembl_gene',\n",
       " u'ensembl_exon_id']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[x for x in s.filters(dataset).keys() if 'ensembl' in x.lower()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "s.new_query()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "s.add_dataset_to_xml('hsapiens_gene_ensembl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "s.add_filter_to_xml(\"ensembl_gene_id\", value=\"ESPN\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "xmlq = s.get_xml()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "res = s.query(xmlq)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "97"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#s.attributes(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
